{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "run_rebiber.ipynb",
      "provenance": [],
      "toc_visible": true,
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# A simple web demo of [Rebiber](https://github.com/yuchenlin/rebiber).\n",
        "\n",
        "We often cite papers using their arXiv versions without noting that they are already PUBLISHED in some conferences. These unofficial bib entries might violate rules about submissions or camera-ready versions for some conferences. We introduce Rebiber, a simple tool in Python to fix them automatically. It is based on the official conference information from the DBLP or the ACL anthology (for NLP confernces)! You can check the list of [supported conferences](https://github.com/yuchenlin/rebiber#supported-conferences)."
      ],
      "metadata": {
        "id": "MwIUXwYdSx6d"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Step 1: Install Rebiber."
      ],
      "metadata": {
        "id": "zrgdP9bieBNe"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "%%capture\n",
        "# Install rebiber package from git, may need to restart runtime\n",
        "!pip install rebiber -U\n",
        "#!pip install -e git+https://github.com/yuchenlin/rebiber.git#egg=rebiber -U"
      ],
      "metadata": {
        "id": "YECNPXcU0pKk"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Step 2: Import Bib."
      ],
      "metadata": {
        "id": "v_rJm39eO5VB"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## by Uploading"
      ],
      "metadata": {
        "id": "nv_-zfLX2H8x"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Run this cell, then clilck upload to upload file(s)\n",
        "from google.colab import files\n",
        "uploaded = files.upload()\n",
        "for fn in uploaded.keys():\n",
        "  print('User uploaded file \"{name}\" with length {length} bytes'.format(\n",
        "      name=fn, length=len(uploaded[fn])))\n",
        "ls_bibfiles = list(uploaded.keys())\n",
        "\n",
        "# print fn and # of bib records in each file\n",
        "import rebiber\n",
        "for fn in ls_bibfiles:\n",
        "  bib_entries = rebiber.load_bib_file(fn)\n",
        "  print(\"filename: {}; # of entries: {}\".format(fn, len(bib_entries)))"
      ],
      "metadata": {
        "id": "5yGuO6Hs0ppt"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## by Pasting"
      ],
      "metadata": {
        "id": "_D-bXvsBeXj_"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#paste an entry or continuous entries between \"\"\"    \"\"\"\n",
        "\n",
        "ls_bib = []\n",
        "s = \"\"\"\n",
        "@Misc{zaheer2021big,\n",
        "  author        = {Manzil Zaheer and Guru Guruganesh and Avinava Dubey and Joshua Ainslie and Chris Alberti and Santiago Ontanon and Philip Pham and Anirudh Ravula and Qifan Wang and Li Yang and Amr Ahmed},\n",
        "  title         = {Big Bird: Transformers for Longer Sequences},\n",
        "  year          = {2021},\n",
        "  archiveprefix = {arXiv},\n",
        "  eprint        = {2007.14062},\n",
        "  file          = {:Ref/2007.14062.pdf:PDF},\n",
        "  primaryclass  = {cs.LG},\n",
        "}\n",
        "\"\"\"\n",
        "\n",
        "ls_bib.append(s)\n",
        "\n",
        "# Prepare inputs, the input and output files are only this Colab session\n",
        "fp_input = \"input_pasted.bib\"\n",
        "#fp_output = \"output.bib\"\n",
        "input_bib = open(fp_input, \"w\")\n",
        "\n",
        "for bib in ls_bib:\n",
        "  input_bib.write('\\n'.join(re.split('\\n\\s+', bib)) + \"\\n\\n\")\n",
        "input_bib.close()\n",
        "\n",
        "ls_bibfiles = [fp_input]"
      ],
      "metadata": {
        "id": "RjAhRJiSCExK"
      },
      "execution_count": 105,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Step 3: PROCESS and PRINT"
      ],
      "metadata": {
        "id": "NR8cNLG9h9B0"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#%%capture\n",
        "# process input bib to output bib.\n",
        "\n",
        "from IPython.utils import io\n",
        "\n",
        "ls_output = []\n",
        "for fn in ls_bibfiles:\n",
        "  fn_output = '{}_rebibed.bib'.format(fn.split('.bib')[0])\n",
        "  ls_output.append(fn_output)\n",
        "  with io.capture_output() as captured:\n",
        "    !rebiber -i {fn} -o {fn_output}\n",
        "\n",
        "# Print bib entries\n",
        "for fn in ls_output:\n",
        "  f = open(fn, 'r')\n",
        "  rebibed_bib = f.read()\n",
        "  f.close()\n",
        "  print(rebibed_bib)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "GylOmxE49Vom",
        "outputId": "0896d49c-9e4b-486a-fffa-4013fc44f5f2"
      },
      "execution_count": 106,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "@misc{zaheer2021big,\n",
            " archiveprefix = {arXiv},\n",
            " author = {Manzil Zaheer and Guru Guruganesh and Avinava Dubey and Joshua Ainslie and Chris Alberti and Santiago Ontanon and Philip Pham and Anirudh Ravula and Qifan Wang and Li Yang and Amr Ahmed},\n",
            " eprint = {2007.14062},\n",
            " file = {:Ref/2007.14062.pdf:PDF},\n",
            " primaryclass = {cs.LG},\n",
            " title = {Big Bird: Transformers for Longer Sequences},\n",
            " year = {2021}\n",
            "}\n",
            "\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Step 4 DOWNLOAD"
      ],
      "metadata": {
        "id": "VKeFQDfYmmJq"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Run this cell to download processed outputs\n",
        "for fn in ls_output:\n",
        "  files.download(fn)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 16
        },
        "id": "LiNQMjYaZOIU",
        "outputId": "87074599-35b7-43e6-97b0-81584795a2dc"
      },
      "execution_count": 107,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/javascript": [
              "\n",
              "    async function download(id, filename, size) {\n",
              "      if (!google.colab.kernel.accessAllowed) {\n",
              "        return;\n",
              "      }\n",
              "      const div = document.createElement('div');\n",
              "      const label = document.createElement('label');\n",
              "      label.textContent = `Downloading \"${filename}\": `;\n",
              "      div.appendChild(label);\n",
              "      const progress = document.createElement('progress');\n",
              "      progress.max = size;\n",
              "      div.appendChild(progress);\n",
              "      document.body.appendChild(div);\n",
              "\n",
              "      const buffers = [];\n",
              "      let downloaded = 0;\n",
              "\n",
              "      const channel = await google.colab.kernel.comms.open(id);\n",
              "      // Send a message to notify the kernel that we're ready.\n",
              "      channel.send({})\n",
              "\n",
              "      for await (const message of channel.messages) {\n",
              "        // Send a message to notify the kernel that we're ready.\n",
              "        channel.send({})\n",
              "        if (message.buffers) {\n",
              "          for (const buffer of message.buffers) {\n",
              "            buffers.push(buffer);\n",
              "            downloaded += buffer.byteLength;\n",
              "            progress.value = downloaded;\n",
              "          }\n",
              "        }\n",
              "      }\n",
              "      const blob = new Blob(buffers, {type: 'application/binary'});\n",
              "      const a = document.createElement('a');\n",
              "      a.href = window.URL.createObjectURL(blob);\n",
              "      a.download = filename;\n",
              "      div.appendChild(a);\n",
              "      a.click();\n",
              "      div.remove();\n",
              "    }\n",
              "  "
            ],
            "text/plain": [
              "<IPython.core.display.Javascript object>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "application/javascript": [
              "download(\"download_9198d73a-970d-4def-8c4b-089f29284bd9\", \"input_pasted_rebibed.bib\", 404)"
            ],
            "text/plain": [
              "<IPython.core.display.Javascript object>"
            ]
          },
          "metadata": {}
        }
      ]
    }
  ]
}